1. Gastos (cálculos antiguos)

Gastos_casa %>% 
  dplyr::select(-Tiempo,-link) %>%
  dplyr::select(fecha, gasto, monto, gastador,obs) %>% tail(30) %>% 
  knitr::kable(format = "markdown", size=12)
fecha gasto monto gastador obs
25/10/2025 Comida 87042 Tami Supermercado
25/10/2025 Comida 37026 Andrés la burguesia
27/10/2025 Electricidad 48758 Andrés de este mes
28/10/2025 Comida 5970 Andrés lider de 27-10-2025
30/10/2025 Agua 37400 Andrés los ultimos 2 mese que no me han cobrado mas 17 lks por lo que habriamos gastado en junio
1/11/2025 Comida 112523 Tami Supermercado
1/11/2025 Diosi 27990 Andrés arena american litter
3/11/2025 Diosi 29568 Andrés arena diosi 2x 20kg
3/11/2025 Comida 44434 Andrés bolsas caca diosi + soul bar 2x20un
8/11/2025 Diosi 65000 Tami Veterinaria
8/11/2025 Comida 101773 Tami Supermercado
8/11/2025 Comida 20000 Andrés lo saldes tami
15/11/2025 Comida 74078 Tami Supermercado
17/11/2025 VTR 22000 Andrés NA
17/11/2025 Electricidad 42000 Andrés NA
22/11/2025 Comida 6800 Tami Helado/Galleta Valpo
23/11/2025 Comida 48730 Tami Almuerzo Valpo
23/11/2025 Comida 9990 Tami Cafetería/Sanguche Valpo
24/11/2025 Transporte 5195 Tami Uber Reñaca-Terminal Buses Viña
24/11/2025 Transporte 2675 Tami Uber Depto-Mcdonald Reñaca
22/11/2025 Transporte 8193 Tami Uber Viña-Depto Reñaca
22/11/2025 Transporte 9766 Tami Uber depto Reñaca-Valpo
24/11/2025 Transporte 10000 Andrés condor
24/11/2025 Comida 38290 Andrés barra central
23/11/2025 Comida 17200 Andrés NA
23/11/2025 Comida 18766 Andrés NA
20/11/2025 Comida 22258 Andrés lider
24/11/2025 Comida 55094 Tami Supermercado
31/3/2019 Comida 9000 Andrés NA
8/9/2019 Comida 24588 Andrés Super Lider

#para ver las diferencias depués de la diosi
Gastos_casa %>%
    dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
    dplyr::mutate(fecha=strftime(fecha, format = "%Y-W%V")) %>%
    dplyr::mutate(gastador=ifelse(gastador=="Andrés",1,0)) %>%
    dplyr::group_by(gastador, fecha,.drop = F) %>% 
    dplyr::summarise(gasto_media=mean(monto,na.rm=T)) %>% 
    dplyr::mutate(treat=ifelse(fecha>"2019-W26",1,0)) %>%
    #dplyr::mutate(fecha_simp=lubridate::week(fecha)) %>%#después de  diosi. Junio 24, 2019 
    dplyr::mutate(gastador_nombre=plyr::revalue(as.character(gastador), c("0" = "Tami", "1"="Andrés"))) %>% 
    assign("ts_gastos_casa_week_treat", ., envir = .GlobalEnv) 

gplots::plotmeans(gasto_media ~ gastador_nombre, main="Promedio de gasto por gastador", data=ts_gastos_casa_week_treat,ylim=c(0,75000), xlab="", ylab="")

library(ggplot2)

ggplot(
  ts_gastos_casa_week_treat,
  aes(x = gastador_nombre, y = gasto_media)
) +
  stat_summary(fun = mean, geom = "point", size = 3, color = "steelblue") +
  stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) +
  facet_wrap(~ treat, labeller = labeller(
    treat = c(`0` = "Antes de Diosi", `1` = "Después de Diosi")
  )) +
  coord_cartesian(ylim = c(0, 70000)) +
  labs(x = "", y = "") +
  theme_minimal(base_size = 13) +
  theme(
    strip.text = element_text(face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

library(ggiraph)
library(scales)
#if( requireNamespace("dplyr", quietly = TRUE)){
gg <- Gastos_casa %>%
  dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
  dplyr::mutate(gastador=ifelse(gastador=="Andrés",1,0)) %>%
  dplyr::mutate(fecha_simp=tsibble::yearweek(fecha)) %>%
  dplyr::mutate(fecha_week=strftime(fecha, format = "%Y-W%V")) %>%
  dplyr::mutate(treat=ifelse(fecha_week>"2019 W26",1,0)) %>%
  dplyr::mutate(gastador_nombre=plyr::revalue(as.character(gastador), c("0" = "Tami", "1"="Andrés"))) %>% 
#  dplyr::mutate(week=as.Date(as.character(lubridate::floor_date(fecha, "week"))))%>%
  #dplyr::mutate(fecha_week= lubridate::parse_date_time(fecha_week, c("%Y-W%V"),exact=T)) %>% 
  dplyr::group_by(gastador_nombre, fecha_simp) %>%
  dplyr::summarise(monto_total=sum(monto)) %>%
  dplyr::mutate(tooltip= paste0(substr(gastador_nombre,1,1),"=",round(monto_total/1000,2))) %>%
  ggplot(aes(hover_css = "fill:none;")) +#, ) +
  #stat_summary(geom = "line", fun.y = median, size = 1, alpha=0.5, aes(color="blue")) +
  geom_line(aes(x = fecha_simp, y = monto_total, color=as.factor(gastador_nombre)),size=1,alpha=.5) +
                       ggiraph::geom_point_interactive(aes(x = fecha_simp, y = monto_total, color=as.factor(gastador_nombre),tooltip=tooltip),size = 1) +
  #geom_text(aes(x = fech_ing_qrt, y = perc_dup-0.05, label = paste0(n)), vjust = -1,hjust = 0, angle=45, size=3) +
 # guides(color = F)+
  theme_custom() +
  geom_vline(xintercept = as.Date("2019-06-24"),linetype = "dashed") +
  labs(y="Gastos (en miles)",x="Semanas y Meses", subtitle="Interlineado, incorporación de la Diosi; Azul= Tami; Rojo= Andrés") + ggtitle( "Figura 4. Gastos por Gastador") +
  scale_y_continuous(labels = f <- function(x) paste0(x/1000)) + 
  scale_color_manual(name = "Gastador", values= c("blue", "red"), labels = c("Tami", "Andrés")) +
  tsibble::scale_x_yearweek(date_breaks = "1 month", minor_breaks = "1 week", labels=scales::date_format("%m/%y")) +
  theme(axis.text.x = element_text(vjust = 0.5,angle = 35), legend.position='bottom')+
     theme(
    panel.border = element_blank(), 
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), 
    axis.line = element_line(colour = "black")
    )

#  x <- girafe(ggobj = gg)
#  x <- girafe_options(x = x,
#                      opts_hover(css = "stroke:red;fill:orange") )
#  if( interactive() ) print(x)

#}
tooltip_css <- "background-color:gray;color:white;font-style:italic;padding:10px;border-radius:10px 20px 10px 20px;"

#ggiraph(code = {print(gg)}, tooltip_extra_css = tooltip_css, tooltip_opacity = .75 )

x <- girafe(ggobj = gg)
x <- girafe_options(x,
  opts_zoom(min = 1, max = 3), opts_hover(css =tooltip_css))
x
plot<-Gastos_casa %>%
    dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
    dplyr::mutate(fecha_week=strftime(fecha, format = "%Y-W%V")) %>%
    dplyr::mutate(month=as.Date(as.character(lubridate::floor_date(fecha, "month"))))%>%
    dplyr::group_by(month)%>%
    dplyr::summarise(gasto_total=sum(monto)/1000) %>%
      ggplot2::ggplot(aes(x = month, y = gasto_total)) +
      geom_point()+
      geom_line(size=1) +
      theme_custom() +
      geom_vline(xintercept = as.Date("2019-06-24"),linetype = "dashed") +
      geom_vline(xintercept = as.Date("2019-03-23"),linetype = "dashed", color="red") +
      labs(y="Gastos (en miles)",x="Meses/Año", subtitle="Interlineado, incorporación de la Diosi") + 
      ggtitle( "Figura. Suma de Gastos por Mes") +        
      scale_x_date(breaks = "1 month", minor_breaks = "1 month", labels=scales::date_format("%m/%y")) +
      theme(axis.text.x = element_text(vjust = 0.5,angle = 45)) 
plotly::ggplotly(plot)  
plot2<-Gastos_casa %>%
    dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
    dplyr::mutate(fecha_week=strftime(fecha, format = "%Y-W%V")) %>%
    dplyr::mutate(day=as.Date(as.character(lubridate::floor_date(fecha, "day"))))%>%
    dplyr::group_by(day)%>%
    summarise(gasto_total=sum(monto)/1000) %>%
      ggplot2::ggplot(aes(x = day, y = gasto_total)) +
      geom_line(size=1) +
      theme_custom() +
      geom_vline(xintercept = as.Date("2019-06-24"),linetype = "dashed") +
      geom_vline(xintercept = as.Date("2020-03-23"),linetype = "dashed", color="red") +
      labs(y="Gastos (en miles)",x="Meses/Año", subtitle="Interlineado, incorporación de la Diosi") + 
      ggtitle( "Figura. Suma de Gastos por Día") +        
      scale_x_date(breaks = "1 month", minor_breaks = "1 week", labels=scales::date_format("%m/%y")) +
      theme(axis.text.x = element_text(vjust = 0.5,angle = 45)) 
plotly::ggplotly(plot2)  
tsData <- Gastos_casa %>%
    dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
    dplyr::mutate(fecha_week=strftime(fecha, format = "%Y-W%V")) %>%
    dplyr::mutate(day=as.Date(as.character(lubridate::floor_date(fecha, "day"))))%>%
    dplyr::group_by(day)%>%
    summarise(gasto_total=sum(monto))%>%
    dplyr::mutate(covid=case_when(day>as.Date("2019-06-02")~1,TRUE~0))%>%
    dplyr::mutate(covid=case_when(day>as.Date("2020-03-10")~covid+1,TRUE~covid))%>%
    dplyr::mutate(covid=as.factor(covid))%>%
  data.frame()
tsData_gastos <-ts(tsData$gasto_total, frequency=7)
mstsData_gastos <- forecast::msts(Gastos_casa$monto, seasonal.periods=c(7,30))

tsData_gastos = decompose(tsData_gastos)

tsdata_gastos_trend<-cbind(tsData,trend=as.vector(tsData_gastos$trend))%>% na.omit()

# Assuming your time series starts on "2019-03-03"
start_date <- as.Date("2019-03-03")
frequency <- 7  # Weekly data
num_periods <- length(tsData_gastos$x)  # Total number of periods in your time series

# Generate sequence of dates
dates <- tsData$day# seq.Date(from = start_date, by = "day", length.out = num_periods)

# Create a data frame from the decomposed time series object
tsData_gastos_df <- data.frame(
  day = dates,
  Actual = as.numeric(tsData_gastos$x),
  Seasonal = as.numeric(tsData_gastos$seasonal),
  Trend = as.numeric(tsData_gastos$trend),
  Random = as.numeric(tsData_gastos$random)
)

tsData_gastos_long <- tsData_gastos_df %>%
  pivot_longer(cols = c("Actual", "Seasonal", "Trend", "Random"), 
               names_to = "Component", values_to = "Value")

# Plotting with facet_wrap
ggplot(tsData_gastos_long, aes(x = day, y = Value)) +
  geom_line() +
  theme_bw() + 
  labs(title = "Descomposición de los Gastos Diarios", x = "Date", y = "Value") +
  scale_x_date(date_breaks = "3 months", date_labels = "%m %Y") +
  facet_wrap(~ Component, scales = "free_y", ncol=1) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  theme(strip.text = element_text(size = 12))

Ahora con las tendencias descompuestas

#require(zoo)
Gastos_casa_gastador_cat_gasto<- 
Gastos_casa %>%
    dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
    dplyr::mutate(fecha2=strftime(fecha, format = "%Y-W%V")) %>%
    dplyr::mutate(gastador=ifelse(gastador=="Andrés",1,0)) %>%
    dplyr::mutate(treat=ifelse(fecha2>"2019-W26",1,0)) %>% 
    dplyr::mutate(
      # 1) Normalización básica de texto
      gasto_clean = gasto %>%
        stringr::str_squish() %>%                   # espacios extra
        stringr::str_to_lower() %>%                 # todo en minúsculas
        stringr::str_replace_all(",", "") %>%       # quita comas, si hubiera
        stringr::str_replace_all("\\s+/\\s+", "/") %>%  # "a / b" -> "a/b"
        stringi::stri_trans_general("Latin-ASCII"),  # sin tildes
      
      # 2) Unificar etiquetas que son lo mismo
      gasto_clean = dplyr::case_when(
        gasto_clean %in% c("uber", "uber reñaca", "uber matri cony", "uber cumple papa") ~ "uber",
        gasto_clean %in% c("yaz", "yaz ") ~ "yaz",
        gasto_clean %in% c("limpieza alfombras", "limpieza alfombra") ~ "limpieza alfombras",
        gasto_clean %in% c("vacuna influenza", "vacunas influenza") ~ "vacuna influenza",
        gasto_clean %in% c("aspiradora", "aspiradora ") ~ "aspiradora",
        gasto_clean %in% c("plata reciclaje y basurero",
                           "plata basurero", "plata basureros",
                           "plata fiestas patrias basureros",
                           "aporte basureros", "basureros") ~ "basureros",
        gasto_clean %in% c("donaciones", "donacion") ~ "donacion",
        gasto_clean %in% c("prestamo", "prestamo andres", "prestamo andres", "prestamo andres ") ~ "prestamo andres",
        TRUE ~ gasto_clean
      ),
      # 3) (Opcional) Variable más agregada de tipo de gasto
      gasto_cat = dplyr::case_when(
        gasto_clean == "comida"                       ~ "Comida",
        gasto_clean %in% c("enceres")                 ~ "Enceres",
        gasto_clean == "diosi"                        ~ "Diosi",
        gasto_clean %in% c("vtr", "netflix",
                           "crunchyroll", "entel")    ~ "Streaming/Telefonia",
        gasto_clean %in% c("electricidad", "gas",
                           "parafina", "kerosen")     ~ "Servicios básicos",
        gasto_clean %in% c("agua")                    ~ "Agua",
        gasto_clean %in% c("uber", "transporte",
                           "gas/bencina", "bencina + tag",
                           "bencina reñaca",
                           "bencina + peajes maite",
                           "bencina + tag cumple delox") ~ "Transporte",
        gasto_clean %in% c("farmacia", "remedio",
                           "remedios", "remedios covid",
                           "gaviscón y paracetamol",
                           "nexium")                 ~ "Farmacia/Salud",
        gasto_clean %in% c("electrodomesticos/mantencion casa",
                           "microondas regalo", "aspiradora",
                           "muebles ratan", "mouse",
                           "reloj")                  ~ "Electrodomesticos",
        TRUE ~ stringr::str_to_title(gasto_clean)  # default: limpio pero no agregado
      )
    ) %>%
    dplyr::mutate(
      gasto_cat10 = dplyr::case_when(
        # 1) COMIDA (incluye los “comida” especiales de tu criterio viejo)
        gasto_cat %in% c(
          "Comida",
          "Pan Pepperino",
          "Cookidoo",
          "Granola Wild Foods",
          "Wild Protein",
          "Brussels"
        ) ~ "Comida",
        # 2) ENCERES
        gasto_cat %in% c(
          "Enceres",
          "Incoludido",
          "Tres Toques"
        ) ~ "Enceres",
        # 3) DIOSI
        gasto_cat == "Diosi" ~ "Diosi",
        # 4) SERVICIOS BÁSICOS (agua, luz, gas/bencina, etc.)
        gasto_cat %in% c(
          "Servicios básicos",
          "Agua",
          "Bencina Renaca"
        ) ~ "Servicios básicos",
        # 5) FARMACIA (todo lo que en tu criterio iba a Farmacia)
        gasto_cat %in% c(
          "Farmacia/Salud",
          "Yaz",
          "Gaviscon Y Paracetamol",
          "Vacuna Influenza",
          "Cruz Verde"
        ) ~ "Farmacia",
        # 6) TRANSPORTE (uber, viajes, bencina que antes era Transporte)
        gasto_cat %in% c(
          "Transporte",
          "Uber Renaca",
          "Viaje Brasil"
        ) ~ "Transporte",
        # 7) ELECTRODOMÉSTICOS / MANTENCIÓN CASA
        gasto_cat %in% c(
          "Electrodomesticos",
          "Cortina Bano",
          "Filtro Agua",
          "Filtro Piscina Mspa",
          "Mantencion Toyotomi",
          "Limpieza Alfombras",
          "Sopapo",
          "Pila Estufa",
          "Jardinero",
          "Camaras Seguridad M.barrios",
          "Pago Camaras Mb",
          "Tina",
          "Nacho",
          "Lamina",
          "Chromecast",
          "Easy"
        ) ~ "Electrodomesticos/mantencion casa",
        # 8) STREAMING / TELEFONÍA (Netflix / VTR / Crunchyroll / Entel, que ya aglutinaste en gasto_cat)
        gasto_cat %in% c(
          "Streaming/Telefonia"
        ) ~ "Streaming/Telefonia",
        # 9) DONACIONES / REGALOS / APORTES
        gasto_cat %in% c(
          "Basureros",
          "Donacion",
          "Regalo Chocolates",
          "Regalo Matri Chepa",
          "Regalo Matri Cony",
          "Matri Andres Kogan",
          "Rgalo Chepa",
          "Ropa",
          "Ropa Tami",
          "Assistcard Viaje"
        ) ~ "Donaciones/regalos",
        # 10) TODO LO DEMÁS → OTROS
        TRUE ~ "Otros"
      )
    ) %>%
    dplyr::group_by(gastador, fecha, gasto_cat10, .drop=F) %>%
    dplyr::summarise(monto=sum(monto)) %>% 
    dplyr::mutate(gastador_nombre=plyr::revalue(as.character(gastador), c("0" = "Tami", "1"="Andrés")))




Gastos_casa_gastador_cat_gasto %>%
    dplyr::mutate(fecha = as.Date(fecha)) %>%       # por si viene como POSIXct
    dplyr::group_by(gasto_cat10) %>%                     # opcional: quita ítems con 1 obs
    dplyr::filter(n() >= 6) %>%   
    dplyr::ungroup() %>%
ggplot2::ggplot(aes(x = fecha, y = monto, color=as.factor(gastador_nombre))) +
  #stat_summary(geom = "line", fun.y = median, size = 1, alpha=0.5, aes(color="blue")) +
  geom_line(size=1) +
  facet_grid(gasto_cat10~.)+
  geom_vline(xintercept = as.Date("2019-06-24"),linetype = "dashed") +
  labs(y="Gastos (en miles)",x="Años", subtitle="Interlineado, incorporación de la Diosi; Azul= Tami; Rojo= Andrés") +
  ggtitle( "Figura 6. Gastos Semanales por Gastador e ítem (media) [n>5]") +
  scale_y_continuous(labels = f <- function(x) paste0(x/1000)) + 
  scale_color_manual(name = "Gastador", values= c("blue", "red"), labels = c("Tami", "Andrés")) +
  tsibble::scale_x_yearweek(breaks = "1 year", minor_breaks = "3 months", labels= scales::date_format("%m/%y")) +
  guides(color = F)+
  theme_custom() +
  theme(axis.text.x = element_text(vjust = 0.5,angle = 35)) +
  theme(
    panel.border = element_blank(), 
    #panel.grid.major = element_blank(),
    #panel.grid.minor = element_blank(), 
    panel.grid.minor = element_line(color = "grey90", size = 0.3),
    panel.grid.major = element_line(color = "grey75", size = 0.5),    
    axis.line = element_line(colour = "black")
    )

# Apply MSTL decomposition

Gastos_casa |> 
    dplyr::mutate(fecha= readr::parse_date(fecha, format="%d/%m/%Y")) |> 
    dplyr::arrange(fecha) |>  pull(monto) -> monto_ts
Gastos_casa |> 
    dplyr::mutate(fecha= readr::parse_date(fecha, format="%d/%m/%Y")) |> 
    dplyr::arrange(fecha) |>  pull(fecha) -> fecha_ts

mstl_data_autplt <- forecast::mstl(monto_ts, lambda = "auto",iterate=5000000,start = 
lubridate::decimal_date(as.Date("2019-03-03")))

# Convert the decomposed time series to a data frame
mstl_df <- data.frame(
  Date = as.Date(fecha_ts, format="%d/%m/%Y"),
  Data = as.numeric(mstl_data_autplt[, "Data"]),
  Trend = as.numeric(mstl_data_autplt[, "Trend"]),
  Remainder = as.numeric(mstl_data_autplt[, "Remainder"])
)

# Reshape the data frame for ggplot2
mstl_long <- mstl_df %>%
  dplyr::arrange(Date) %>%
  tidyr::pivot_longer(cols = -Date, names_to = "Component", values_to = "Value")

mstl_long_filtered <- mstl_long %>% dplyr::filter(!(Date %in% (mstl_long %>% dplyr::distinct(Date) %>% top_n(7, Date) %>% pull(Date))))

# Plotting with ggplot2
ggplot(mstl_long_filtered, aes(x = Date, y = Value)) +
  geom_line() +
  theme_bw() + 
  labs(title = "Descomposición MSTL (- 7 days)", x = "Fecha", y = "Valor") +
  scale_x_date(date_breaks = "3 months", date_labels = "%m-%Y") +
  facet_wrap(~ Component, scales = "free_y", ncol = 1) +
  theme(strip.text = element_text(size = 12),
        axis.text.x = element_text(angle = 90, hjust = 1))

ts_week_covid<-  
Gastos_casa %>%
    dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
    dplyr::mutate(fecha_week=strftime(fecha, format = "%Y-W%V")) %>%
    dplyr::mutate(day=as.Date(as.character(lubridate::floor_date(fecha, "day"))))%>%
    dplyr::group_by(fecha_week)%>%
    dplyr::summarise(gasto_total=sum(monto,na.rm=T)/1000,min_day=min(day))%>%
    dplyr::ungroup() %>% 
    dplyr::mutate(covid=dplyr::case_when(min_day>=as.Date("2020-03-17")~1,TRUE~0))%>%
    dplyr::mutate(covid=as.factor(covid))%>%
    data.frame()


ts_week_covid$gasto_total_na<-ts_week_covid$gasto_total
post_resp<-ts_week_covid$gasto_total[which(ts_week_covid$covid==1)]
ts_week_covid$gasto_total_na[which(ts_week_covid$covid==1)]<-NA
ts_week_covid$gasto_total[which(ts_week_covid$covid==0)]
##  [1]  98.357   4.780  56.784  50.506  64.483  67.248  49.299  35.786  58.503
## [10]  64.083  20.148  73.476 127.004  81.551  69.599 134.446  58.936  26.145
## [19] 129.927 104.989 130.860  81.893  95.697  64.579 303.471 151.106  49.275
## [28]  76.293  33.940  83.071 119.512  20.942  58.055  71.728  44.090  33.740
## [37]  59.264  77.410  60.831  63.376  48.754 235.284  29.604 115.143  72.419
## [46]   5.980  80.063 149.178  69.918 107.601  72.724  63.203  99.681 130.309
## [55] 195.898 112.066
# 1) Create corpus
corpus <- tm::Corpus(tm::VectorSource(Gastos_casa$obs))

# 2) Preprocess text
d <- corpus |>
  tm::tm_map(tm::content_transformer(tolower)) |>
  tm::tm_map(tm::stripWhitespace) |>
  tm::tm_map(tm::removePunctuation) |>
  tm::tm_map(tm::removeNumbers) |>
  tm::tm_map(tm::removeWords, tm::stopwords("spanish")) |>
  tm::tm_map(tm::removeWords, "menos")

# 3) Term-document matrix
tdm <- tm::TermDocumentMatrix(d)

# 4) Convert to matrix
m <- base::as.matrix(tdm)

# 5) Frequencies
v  <- base::sort(base::rowSums(m), decreasing = TRUE)

df <- base::data.frame(
  word = base::names(v),
  freq = v,
  row.names = NULL
)

# 6) Wordcloud
wordcloud::wordcloud(
  words       = df$word,
  freq        = df$freq,
  max.words   = 100,
  random.order= FALSE,
  rot.per     = 0.35,
  colors      = RColorBrewer::brewer.pal(8, "Dark2"),
  scale       = c(4, 0.5)
)

fit_month_gasto <- Gastos_casa_gastador_cat_gasto %>%
  dplyr::ungroup() %>%
  dplyr::mutate(
    # month string YYYY-MM
    fecha_month = base::format(fecha, "%Y-%m")
  ) %>%
  dplyr::mutate(
    # order months from 2019-03 until today (like in your original code)
    fecha_month = base::factor(
      fecha_month,
      levels = base::format(
        base::seq(
          from = base::as.Date("2019-03-03"),
          to   = base::as.Date(base::substr(base::Sys.time(), 1, 10)),
          by   = "1 month"
        ),
        "%Y-%m"
      )
    )
  ) %>%
  dplyr::group_by(fecha_month, gasto_cat10, .drop = FALSE) %>%
  dplyr::summarise(
    gasto_total = base::sum(monto, na.rm = TRUE) / 1000,
    .groups = "drop"
  ) %>%
  base::as.data.frame()

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
current_ym <- base::format(base::Sys.Date(), "%Y-%m")

tabla_gasto_year_item <-
    fit_month_gasto %>%
    dplyr::mutate(
        year = base::substr(base::as.character(fecha_month), 1, 4),
        ym   = base::as.character(fecha_month)
    ) %>%
    # opcional: excluir el mes actual (incompleto)
    dplyr::filter(ym != current_ym) %>%
    # si quieres solo ciertos años:
    # dplyr::filter(year %in% c("2020","2021","2022","2023","2024","2025")) %>%
    dplyr::group_by(gasto_cat10, year) %>%
    dplyr::summarise(
        gasto_prom = base::mean(gasto_total, na.rm = TRUE),
        .groups    = "drop"
    ) %>%
    tidyr::pivot_wider(
        names_from  = year,
        values_from = gasto_prom
    ) %>%
    janitor::adorn_totals()  # agrega fila "Total"

tabla_gasto_year_item |> 
  knitr::kable("markdown",
    caption = "Gasto Promedio Mensual por Ítem y Año (suma por categoría y mes)",
    digits  = 1)
Gasto Promedio Mensual por Ítem y Año (suma por categoría y mes)
gasto_cat10 2019 2020 2021 2022 2023 2024 2025
Comida 208.4 392.9 317.9 312.4 366.0 326.9 317.5
Diosi 84.2 74.2 79.0 53.5 74.4 50.0 36.5
Donaciones/regalos NA 34.2 86.0 49.3 45.0 34.0 NA
Electrodomesticos/mantencion casa NA 57.8 196.4 27.6 43.9 63.4 42.7
Enceres 62.7 78.0 35.5 36.3 36.5 48.0 14.6
Farmacia NA 33.7 33.6 17.0 32.1 NA NA
Otros NA NA 90.0 60.0 5.2 229.5 18.3
Servicios básicos 73.2 57.4 73.4 105.6 85.7 129.3 101.3
Streaming/Telefonia 22.1 29.8 26.1 32.3 25.9 26.3 22.0
Transporte NA NA NA 26.4 32.9 242.2 NA
Total 450.6 758.1 938.0 720.4 747.7 1149.5 552.8


2. UF Proyectada

Saqué la UF proyectada

#options(max.print=5000)

uf18 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2018.htm")%>% rvest::html_nodes("table")
uf19 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2019.htm")%>% rvest::html_nodes("table")
uf20 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2020.htm")%>% rvest::html_nodes("table")
uf21 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2021.htm")%>% rvest::html_nodes("table")
uf22 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2022.htm")%>% rvest::html_nodes("table")
uf23 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2023.htm")%>% rvest::html_nodes("table")
uf24 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2024.htm")%>% rvest::html_nodes("table")

tryCatch(uf25 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2025.htm")%>% rvest::html_nodes("table"),
    error = function(c) {
      uf24b <<- cbind.data.frame(Día=NA, variable=NA, value=NA)
      
    }
  )

tryCatch(uf25 <-uf25[[length(uf25)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1),
    error = function(c) {
      uf25 <<- cbind.data.frame(Día=NA, variable=NA, value=NA)
    }
)

uf_serie<-
bind_rows(
cbind.data.frame(anio= 2018, uf18[[length(uf18)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2019, uf19[[length(uf19)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2020, uf20[[length(uf20)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2021, uf21[[length(uf21)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2022, uf22[[length(uf22)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2023, uf23[[length(uf23)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2024, uf23[[length(uf24)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2025, uf25)
)

uf_serie_corrected<-
uf_serie %>% 
dplyr::mutate(month=plyr::revalue(tolower(.[[3]]),c("ene" = 1, "feb"=2, "mar"=3, "abr"=4, "may"=5, "jun"=6, "jul"=7, "ago"=8, "sep"=9, "oct"=10, "nov"=11, "dic"=12))) %>% 
  dplyr::mutate(value=stringr::str_trim(value), value= sub("\\.","",value),value= as.numeric(sub("\\,",".",value))) %>% 
  dplyr::mutate(date=paste0(sprintf("%02d", .[[2]])," ",sprintf("%02d",as.numeric(month)),", ",.[[1]]), date3=lubridate::parse_date_time(date,c("%d %m, %Y"),exact=T),date2=date3) %>% 
   na.omit()#%>%  dplyr::filter(is.na(date3))
## Warning: There was 1 warning in `dplyr::mutate()`.
## i In argument: `date3 = lubridate::parse_date_time(date, c("%d %m, %Y"), exact
##   = T)`.
## Caused by warning:
## !  54 failed to parse.
#Day of the month as decimal number (1–31), with a leading space for a single-digit number.
#Abbreviated month name in the current locale on this platform. (Also matches full name on input: in some locales there are no abbreviations of names.)

warning(paste0("number of observations:",nrow(uf_serie_corrected),",  min uf: ",min(uf_serie_corrected$value),",  min date: ",min(uf_serie_corrected $date3 )))
## Warning: number of observations:2899, min uf: 26799.01, min date: 2018-01-01
# 
# uf_proyectado <- readxl::read_excel("uf_proyectado.xlsx") %>% dplyr::arrange(Período) %>% 
#   dplyr::mutate(Período= as.Date(lubridate::parse_date_time(Período, c("%Y-%m-%d"),exact=T)))
ts_uf_proy<-
ts(data = uf_serie_corrected$value, 
   start = as.numeric(as.Date("2018-01-01")), 
   end = as.numeric(as.Date(uf_serie_corrected$date3[length(uf_serie_corrected$date3)])), frequency = 1,
   deltat = 1, ts.eps = getOption("ts.eps"))

fit_tbats <- forecast::tbats(ts_uf_proy)
## Warning in bats(as.numeric(y), use.box.cox = use.box.cox, use.trend =
## use.trend, : optim() did not converge.
fr_fit_tbats<-forecast::forecast(fit_tbats, h=298)
# Configurar API Key
if(nchar(Sys.getenv("API_NIXTLA"))<2){
  Sys.setenv("API_NIXTLA"=as.character(read.table(paste0(dirname(getwd()),"/nixtlar_api_key.txt"), quote="\"", comment.char="")))
}
if(nchar(Sys.getenv("API_NIXTLA"))<2){
  try(Sys.setenv("API_NIXTLA"=as.character(read.table(paste0(gsub("/gastos","",dirname(rstudioapi::getActiveDocumentContext()$path)),"/nixtlar_api_key.txt"), quote="\"", comment.char=""))))
}

try(nixtlar::nixtla_set_api_key(Sys.getenv("NIXTLA")))
## API key has been set for the current session.
if(nchar(Sys.getenv("NIXTLA"))<2){
  nixtlar::nixtla_set_api_key(Sys.getenv("API_NIXTLA"))
}
## API key has been set for the current session.
# Preparar datos en formato requerido por TimeGPT
uf_timegpt <- uf_serie_corrected %>%
    dplyr::rename(ds = date3, y = value) %>%
    dplyr::mutate(ds = format(ds, "%Y-%m-%d")) %>%
    dplyr::mutate(unique_id = "serie_1")%>%
    dplyr::select(unique_id, ds, y)

# Realizar pronóstico con TimeGPT
timegpt_fcst <- nixtlar::nixtla_client_forecast(
  uf_timegpt,
  h = 298,               # 298 días a pronosticar
  freq = "D",            # Frecuencia diaria
  add_history = FALSE,     # Incluir datos históricos en el output
  level = c(80,95),
  model=  "timegpt-1-long-horizon", 
  clean_ex_first = TRUE
)
## The specified horizon h exceeds the model horizon. This may lead to less accurate forecasts. Please consider using a smaller horizon.
# The Conflict: The API endpoint for the long-horizon model likely does not support generating "fitted values" for the historical input data, causing the server to return "Unprocessable Entity" (422).

# 1. Convertir 'ds' a fecha en ambas tablas
uf_timegpt <- uf_timegpt %>% 
    mutate(ds = as.Date(ds))

timegpt_fcst <- timegpt_fcst %>% 
    mutate(ds = as.Date(ds))

# 2. Combinar los datos históricos y el pronóstico
full_data <- bind_rows(
    uf_timegpt %>% mutate(type = "Histórico"),
    timegpt_fcst %>% mutate(type = "Pronóstico")
)


# Ensure dates are Date objects
uf_timegpt <- uf_timegpt %>% mutate(ds = as.Date(ds))
timegpt_fcst <- timegpt_fcst %>% mutate(ds = as.Date(ds))

ggplot() +
    # --- FORECAST LAYERS (Map y to 'TimeGPT') ---
    # 95% Confidence Interval
    geom_ribbon(data = timegpt_fcst, 
                aes(x = ds, ymin = `TimeGPT-lo-95`, ymax = `TimeGPT-hi-95`), 
                fill = "#4B9CD3", alpha = 0.2) +
    # 80% Confidence Interval
    geom_ribbon(data = timegpt_fcst, 
                aes(x = ds, ymin = `TimeGPT-lo-80`, ymax = `TimeGPT-hi-80`), 
                fill = "#4B9CD3", alpha = 0.3) +
    # Forecast Line
    geom_line(data = timegpt_fcst, 
              aes(x = ds, y = TimeGPT, color = "Pronóstico"), size = 1) +

    # --- HISTORICAL LAYER (Map y to 'y') ---
    geom_line(data = uf_timegpt, 
              aes(x = ds, y = y, color = "Histórico"), size = 1) +

    # --- STYLING ---
    geom_vline(xintercept = max(uf_timegpt$ds), 
               linetype = "dashed", color = "red", size = 0.8) +
    scale_x_date(date_breaks = "3 months", date_labels = "%b %Y") +
    scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
    scale_color_manual(name = "Leyenda", 
                       values = c("Histórico" = "black", "Pronóstico" = "#4B9CD3")) +
    # Configuración del eje x
    scale_x_date(
        date_breaks = "3 months",  # Reduce la frecuencia de las etiquetas
        date_labels = "%b %Y",  # Formato de etiquetas (mes y año)
    ) +
    # Configuración del eje y
    scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
    # Configuración de colores
    scale_color_manual(
        name = "Leyenda",
        values = c("Histórico" = "black", "Pronóstico" = "#4B9CD3")
    ) +
    # Títulos y subtítulos
    labs(
        title = "Pronóstico de UF, Serie Temporal con TimeGPT",
        subtitle = "Intervalos de confianza al 80% (más oscuro) y 95% (más claro)",
        x = "Fecha",
        y = "Valor",
        color = "Leyenda"
    ) +
    # Tema y estilos
    theme_minimal() +
    theme(
        axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10),
        legend.position = "bottom",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()
    )
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

library(prophet)
## Warning: package 'prophet' was built under R version 4.4.3
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.4.3
## Loading required package: rlang
## Warning: package 'rlang' was built under R version 4.4.3
## 
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
## 
##     flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice
## The following object is masked from 'package:sparklyr':
## 
##     invoke
## The following object is masked from 'package:data.table':
## 
##     :=
  model <- prophet(
  cbind.data.frame(ds= as.Date(uf_timegpt$ds), y=uf_timegpt$y),
  # Trend flexibility
  growth = "linear",
  changepoint.prior.scale = 0.05,  # Reduced for smoother trend
  n.changepoints = 50,  # Increased from default 25
  
  # Seasonality
  yearly.seasonality = TRUE,
  weekly.seasonality = TRUE,
  daily.seasonality = FALSE,  # Disabled for daily data
  seasonality.mode = "additive",
  seasonality.prior.scale = 15,  # Increased to capture stronger seasonality
  
  # Holidays (if applicable)
  # holidays = generated_holidays  # Create with add_country_holidays()
  
  # Uncertainty intervals
  interval.width = 0.95,
  uncertainty.samples = 1000
)
future <- make_future_dataframe(model, periods = 298, include_history = T)
forecast <- predict(model, future)
forecast <- forecast[, c("ds", "yhat", "yhat_lower", "yhat_upper")]
forecast$pred <- ifelse(forecast$ds > max(uf_timegpt$ds), 1,0)
## Warning in check_tzones(e1, e2): 'tzone' attributes are inconsistent
forecast$ds <- as.Date(forecast$ds)

ggplot(forecast, aes(x = ds, y = yhat)) +
  geom_ribbon(aes(ymin = yhat_lower, ymax = yhat_upper), 
              fill = "#9ecae1", alpha = 0.4) +
  geom_line(color = "#08519c", linewidth = 0.8) +
  geom_vline(xintercept = max(uf_timegpt$ds), color = "red", linetype = "dashed", linewidth=1) +
  scale_x_date(date_breaks = "6 months", date_labels = "%y %b") +
  scale_y_continuous(labels = scales::comma) +
  labs(title = "Valores predichos (95%IC)",
      # subtitle = "March 10, 2025 - May 7, 2025",
       x = "Fecha",
       y = "Valor",
      # caption = "Source: Prophet Forecast Model"
      ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(color = "gray50"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    plot.caption = element_text(color = "gray30")
  )

# 1. Ensure dates are strictly Date objects
history_df <- uf_timegpt %>% mutate(ds = as.Date(ds))
fcst_df    <- timegpt_fcst %>% mutate(ds = as.Date(ds))

# 2. Plotting (Notice we don't bind_rows, we layer them)
ggplot() +
    # --- FORECAST LAYERS ---
    # 95% Confidence Interval
    geom_ribbon(data = fcst_df, 
                aes(x = ds, ymin = `TimeGPT-lo-95`, ymax = `TimeGPT-hi-95`), 
                fill = "#4B9CD3", alpha = 0.2) +
    # 80% Confidence Interval
    geom_ribbon(data = fcst_df, 
                aes(x = ds, ymin = `TimeGPT-lo-80`, ymax = `TimeGPT-hi-80`), 
                fill = "#4B9CD3", alpha = 0.3) +
    # Forecast Line (Map y to the 'TimeGPT' column)
    geom_line(data = fcst_df, 
              aes(x = ds, y = TimeGPT, color = "Pronóstico"), size = 1) +

    # --- HISTORY LAYER ---
    # History Line (Map y to the 'y' column)
    geom_line(data = history_df, 
              aes(x = ds, y = y, color = "Histórico"), size = 1) +

    # --- STYLING ---
    geom_vline(xintercept = max(history_df$ds), 
               linetype = "dashed", color = "red", size = 0.8) +
    scale_x_date(date_breaks = "3 months", date_labels = "%b %Y") +
    scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
    scale_color_manual(name = "Leyenda", 
                       values = c("Histórico" = "black", "Pronóstico" = "#4B9CD3")) +
    labs(
        title = "Pronóstico de Serie Temporal con TimeGPT",
        subtitle = "Modelo: timegpt-1-long-horizon",
        x = "Fecha", y = "Valor"
    ) +
    theme_minimal() +
    theme(
      plot.title = element_text(face = "bold", size = 14),
      plot.subtitle = element_text(color = "gray50"),
      axis.text.x = element_text(angle = 45, hjust = 1),
      panel.grid.minor = element_blank(),
      panel.border = element_blank(),
      plot.caption = element_text(color = "gray30")
    )+
      theme(legend.position = "bottom")

La proyección de la UF a 298 días más 2025-12-09 00:04:58 sería de: 26.541 pesos// Percentil 95% más alto proyectado: 35.006,88

Según TimeGPT: La proyección de la UF a 298 días más 2026-10-03 sería de: 40.075,16 pesos// Percentil 80% más alto proyectado: 41.971,24 pesos// Percentil 95% más alto proyectado: 42.499,27

Según prophet: La proyección de la UF a 298 días más 2026-10-03 sería de: 42.567 pesos// Percentil 95% más alto proyectado: 51.083

Ahora con un modelo ARIMA automático


arima_optimal_uf = forecast::auto.arima(ts_uf_proy)

  autoplotly::autoplotly(forecast::forecast(arima_optimal_uf, h=298), ts.colour = "darkred",
           predict.colour = "blue", predict.linetype = "dashed")%>% 
  plotly::layout(showlegend = F, 
          yaxis = list(title = "Gastos"),
         xaxis = list(
    title="Fecha",
      ticktext = as.list(seq(from = as.Date("2018-01-01"), 
                                  to = as.Date("2018-01-01")+length(fit_tbats$fitted.values)+298, by = 90)), 
      tickvals = as.list(seq(from = as.numeric(as.Date("2018-01-01")), 
                             to = as.numeric(as.Date("2018-01-01"))+length(fit_tbats$fitted.values)+298, by = 90)),
      tickmode = "array",
    tickangle = 90
    ))
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## i Please use tidy evaluation idioms with `aes()`.
## i See also `vignette("ggplot2-in-packages")` for more information.
## i The deprecated feature was likely used in the ggfortify package.
##   Please report the issue at <https://github.com/sinhrks/ggfortify/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fr_fit_tbats_uf<-forecast::forecast(arima_optimal_uf, h=298)
dplyr::group_by(reshape2::melt(data.frame(fr_fit_tbats)),variable) %>% dplyr::summarise(max=max(value)) %>% 
dplyr::right_join(dplyr::group_by(reshape2::melt(data.frame(fr_fit_tbats_uf)),variable) %>% dplyr::summarise(max=max(value)),by="variable") %>% 
  dplyr::mutate(variable=factor(variable,levels=c("Lo.95","Lo.80","Point.Forecast","Hi.80","Hi.95"))) %>% 
  dplyr::arrange(variable) %>% 
  knitr::kable(format="markdown", caption="Tabla. Estimación UF (de aquí a 298 días) según cálculos de gastos mensuales",
               col.names= c("Item","UF Proyectada (TBATS)","UF Proyectada (ARIMA)"))
## No id variables; using all as measure variables
## No id variables; using all as measure variables
Tabla. Estimación UF (de aquí a 298 días) según cálculos de gastos mensuales
Item UF Proyectada (TBATS) UF Proyectada (ARIMA)
Lo.95 26160.90 26323.63
Lo.80 26292.00 26488.18
Point.Forecast 26541.46 26799.01
Hi.80 31366.26 32164.85
Hi.95 34265.88 35005.35


3. Gastos proyectados

Lo haré en base a 2 cálculos: el gasto semanal y el gasto mensual en base a mis gastos desde marzo de 2019. La primera proyección la hice añadiendo el precio del arriendo mensual y partiendo en 2 (porque es con yo y Tami). No se incluye el último mes.

Gastos_casa_nvo <- readr::read_csv(as.character(path_sec),
                               col_names = c("Tiempo", "gasto", "fecha", "obs", "monto", "gastador",
                                             "link"),skip=1) %>% 
              dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>% 
              dplyr::mutate(fecha_month=strftime(fecha, format = "%Y-%m")) %>%
              dplyr::mutate(day=as.Date(as.character(lubridate::floor_date(fecha, "day"))))

Gastos_casa_m <-
Gastos_casa_nvo %>% dplyr::group_by(fecha_month)%>%
              dplyr::summarise(gasto_total=(sum(monto)+500000)/1000,fecha=first(fecha))%>%
              data.frame()
  

uf_serie_corrected_m <-
uf_serie_corrected %>% dplyr::mutate(ano_m=paste0(anio,"-",sprintf("%02d",as.numeric(month)))) %>%  dplyr::group_by(ano_m)%>%
              dplyr::summarise(uf=(mean(value))/1000,fecha=first(date3))%>%
              data.frame() %>% 
  dplyr::filter(fecha>="2019-02-28")
#Error: Error in standardise_path(file) : object 'enlace_gastos' not found

ts_uf_serie_corrected_m<-
ts(data = uf_serie_corrected_m$uf[-length(uf_serie_corrected_m$uf)], 
   start = 1, 
   end = nrow(uf_serie_corrected_m), 
   frequency = 1,
   deltat = 1, ts.eps = getOption("ts.eps"))

ts_gastos_casa_m<-
ts(data = Gastos_casa_m$gasto_total[-length(Gastos_casa_m$gasto_total)], 
   start = 1, 
   end = nrow(Gastos_casa_m), 
   frequency = 1,
   deltat = 1, ts.eps = getOption("ts.eps"))

fit_tbats_m <- forecast::tbats(ts_gastos_casa_m)

seq_dates<-format(seq(as.Date("2019/03/01"), by = "month", length = dim(Gastos_casa_m)[1]+12), "%m\n'%y")

autplo2t<-
  autoplotly::autoplotly(forecast::forecast(fit_tbats_m, h=12), ts.colour = "darkred",
           predict.colour = "blue", predict.linetype = "dashed")%>% 
  plotly::layout(showlegend = F, 
          yaxis = list(title = "Gastos (en miles)+ Arriendo"),
         xaxis = list(
    title="Fecha",
      ticktext = as.list(seq_dates[seq(from = 1, to = (dim(Gastos_casa_m)[1]+12), by = 3)]), 
      tickvals = as.list(seq(from = 1, to = (dim(Gastos_casa_m)[1]+12), by = 3)),
      tickmode = "array"#"array"
    )) 

autplo2t

Ahora asumiendo un modelo ARIMA, e incluimos como regresor al precio de la UF.

paste0("Optimo pero sin regresor")
## [1] "Optimo pero sin regresor"
arima_optimal = forecast::auto.arima(ts_gastos_casa_m)
arima_optimal
## Series: ts_gastos_casa_m 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1      mean
##       0.4448  1031.957
## s.e.  0.1012    38.334
## 
## sigma^2 = 38228:  log likelihood = -541.36
## AIC=1088.72   AICc=1089.03   BIC=1095.9
paste0("Optimo pero con regresor")
## [1] "Optimo pero con regresor"
arima_optimal2 = forecast::auto.arima(ts_gastos_casa_m, xreg=as.numeric(ts_uf_serie_corrected_m[1:(length(Gastos_casa_m$gasto_total))]))
arima_optimal2
## Series: ts_gastos_casa_m 
## Regression with ARIMA(1,0,0) errors 
## 
## Coefficients:
##          ar1  intercept    xreg
##       0.4394   845.7396  5.6495
## s.e.  0.1017   313.5410  9.4353
## 
## sigma^2 = 38552:  log likelihood = -541.18
## AIC=1090.37   AICc=1090.89   BIC=1099.94
forecast_uf<-
cbind.data.frame(fecha=as.Date(seq(as.numeric(as.Date(uf_serie_corrected$date3[length(uf_serie_corrected$date3)])),(as.numeric(as.Date(uf_serie_corrected$date3[length(uf_serie_corrected$date3)]))+299),by=1), origin = "1970-01-01"),forecast::forecast(fit_tbats, h=300)) %>% 
  dplyr::mutate(ano_m=stringr::str_extract(fecha,".{7}")) %>% 
  dplyr::group_by(ano_m)%>%
              dplyr::summarise(uf=(mean(`Hi 95`,na.rm=T))/1000,fecha=first(fecha))%>%
            data.frame()
autplo2t2<-
  autoplotly::autoplotly(forecast::forecast(arima_optimal2,xreg=c(forecast_uf$uf[1],forecast_uf$uf), h=12), ts.colour = "darkred",
           predict.colour = "blue", predict.linetype = "dashed")%>% 
  plotly::layout(showlegend = F, 
          yaxis = list(title = "Gastos (en miles)"),
         xaxis = list(
    title="Fecha",
      ticktext = as.list(seq_dates[seq(from = 1, to = (dim(Gastos_casa_m)[1]+12), by = 3)]), 
      tickvals = as.list(seq(from = 1, to = (dim(Gastos_casa_m)[1]+12), by = 3)),
      tickmode = "array"#"array"
    )) 

autplo2t2
fr_fit_tbats_m<-forecast::forecast(fit_tbats_m, h=12)
fr_fit_tbats_m2<-forecast::forecast(arima_optimal, h=12)
fr_fit_tbats_m3<-forecast::forecast(arima_optimal2, h=12,xreg=c(forecast_uf$uf[1],forecast_uf$uf))

dplyr::right_join(dplyr::group_by(reshape2::melt(data.frame(fr_fit_tbats_m3)),variable) %>% dplyr::summarise(max=max(value)), dplyr::group_by(reshape2::melt(data.frame(fr_fit_tbats_m2)),variable) %>% dplyr::summarise(max=max(value)),by="variable") %>% 
dplyr::right_join(dplyr::group_by(reshape2::melt(data.frame(fr_fit_tbats_m)),variable) %>% dplyr::summarise(max=max(value)),by="variable") %>% 
  dplyr::mutate(variable=factor(variable,levels=c("Lo.95","Lo.80","Point.Forecast","Hi.80","Hi.95"))) %>% 
  dplyr::arrange(variable) %>% 
  knitr::kable(format="markdown", caption="Estimación en miles de la plata a gastar en el futuro (de aquí a 12 meses) según cálculos de gastos mensuales",
               col.names= c("Item","Modelo ARIMA con regresor (UF)","Modelo ARIMA sin regresor","Modelo TBATS")) 
## No id variables; using all as measure variables
## No id variables; using all as measure variables
## No id variables; using all as measure variables
Estimación en miles de la plata a gastar en el futuro (de aquí a 12 meses) según cálculos de gastos mensuales
Item Modelo ARIMA con regresor (UF) Modelo ARIMA sin regresor Modelo TBATS
Lo.95 610.9522 604.0737 614.9288
Lo.80 759.2349 752.1732 708.2388
Point.Forecast 1039.3477 1031.9398 924.8718
Hi.80 1319.4605 1311.7064 1244.2713
Hi.95 1467.7432 1459.8058 1455.8453


4. Gastos mensuales (resumen manual)

path_sec2<- paste0("https://docs.google.com/spreadsheets/d/",Sys.getenv("SUPERSECRET"),"/export?format=csv&id=",Sys.getenv("SUPERSECRET"),"&gid=847461368")

Gastos_casa_mensual_2022 <- readr::read_csv(as.character(path_sec2),
                #col_names = c("Tiempo", "gasto", "fecha", "obs", "monto", "gastador","link"),
                skip=0)
## Rows: 83 Columns: 4
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): mes_ano
## dbl (3): n, Tami, Andrés
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Gastos_casa_mensual_2022,5) %>% 
  knitr::kable("markdown",caption="Resumen mensual, primeras 5 observaciones")
Resumen mensual, primeras 5 observaciones
n mes_ano Tami Andrés
1 marzo_2019 175533 68268
2 abril_2019 152640 55031
3 mayo_2019 152985 192219
4 junio_2019 291067 84961
5 julio_2019 241389 205893


(
Gastos_casa_mensual_2022 %>% 
    reshape2::melt(id.var=c("n","mes_ano")) %>%
  dplyr::mutate(gastador=as.factor(variable)) %>% 
  dplyr::select(-variable) %>% 
 ggplot2::ggplot(aes(x = n, y = value, color=gastador)) +
  scale_color_manual(name="Gastador", values=c("red", "blue"))+
  geom_line(size=1) +
  #geom_vline(xintercept = as.Date("2019-06-24"),linetype = "dashed") +
  labs(y="Gastos (en miles)",x="Meses", subtitle="Azul= Tami; Rojo= Andrés") +
  ggtitle( "Gastos Mensuales (total manual)") +
  scale_y_continuous(labels = f <- function(x) paste0(x/1000)) + 
#  scale_color_manual(name = "Gastador", values= c("blue", "red"), labels = c("Tami", "Andrés")) +
#  scale_x_yearweek(breaks = "1 month", minor_breaks = "1 week", labels=date_format("%m/%y")) +
 # guides(color = F)+
  theme_custom() +
  theme(axis.text.x = element_text(vjust = 0.5,angle = 35)) +
  theme(
    panel.border = element_blank(), 
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), 
    axis.line = element_line(colour = "black")
    )
) %>% ggplotly()
Gastos_casa_mensual_2022$mes_ano <- gsub("marzo", "Mar", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("abril", "Apr", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("mayo", "May", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("junio", "Jun", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("julio", "Jul", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("agosto", "Aug", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("septiembre", "Sep", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("octubre", "Oct", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("noviembre", "Nov", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("diciembre", "Dec", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("enero", "Jan", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("febrero", "Feb", Gastos_casa_mensual_2022$mes_ano)

Gastos_casa_mensual_2022<- dplyr::filter(Gastos_casa_mensual_2022, !is.na(Tami))

Gastos_casa_mensual_2022$mes_ano <- parse_date_time(Gastos_casa_mensual_2022$mes_ano, "%b_%Y")

Gastos_casa_mensual_2022$mes_ano <- as.Date(as.character(Gastos_casa_mensual_2022$mes_ano))

Gastos_casa_mensual_2022_timegpt <- Gastos_casa_mensual_2022 %>%
  mutate(value = Tami + Andrés) %>%
  rename(ds = mes_ano, y = value) %>%
  mutate(#ds= format(ds, "%Y-%m"),
         unique_id = "1") %>% #it is only one series
  select(unique_id, ds, y)

#Convertir la base de UF a mensual
uf_timegpt_my <- uf_serie_corrected %>%
  dplyr::rename(ds = date3, y = value) %>%
  dplyr::mutate(ds = format(ds, "%Y-%m-%d")) %>%
  dplyr::mutate(unique_id = "serie_1")%>%
  dplyr::select(unique_id, ds, y) %>%
  mutate(ds = ymd(ds)) %>%  # Convert 'ds' to Date
  mutate(month = month(ds), year = year(ds)) %>%  # Extract month and year
  group_by(month, year) %>%  # Group by month and year
  summarise(average_y = mean(y))%>%  # Calculate average y
  mutate(ds = as.Date(paste0(year,"-",month, "-01")))%>%
  ungroup()%>%
  select(ds, uf=average_y)

Gastos_casa_mensual_2022_timegpt_ex<-
Gastos_casa_mensual_2022_timegpt |> 
  dplyr::left_join(uf_timegpt_my, by=c("ds"="ds")) 

#Historical Exogenous Variables: These should be included in the input data immediately following the id_col, ds, and y columns
gastos_timegpt_fcst <- nixtlar::nixtla_client_forecast(
  Gastos_casa_mensual_2022_timegpt_ex,
  h = 12,
  freq = "M",  # Monthly frequency
  add_history = F,
  level = c(80, 95),
  model = "timegpt-1",#"timegpt-1-long-horizon",
  clean_ex_first = TRUE
)

# Convert 'ds' to Date format in both tables
Gastos_casa_mensual_2022_timegpt_corr <- Gastos_casa_mensual_2022_timegpt %>%
  mutate(ds = as.Date(paste0(ds, "-01")))  # Add day to make it a complete date

gastos_timegpt_fcst <- gastos_timegpt_fcst %>%
  mutate(ds = as.Date(paste0(ds, "-01")))  # Add day to make it a complete date

# Combine historical and forecast data
full_data_gastos <- bind_rows(
  Gastos_casa_mensual_2022_timegpt_corr %>% mutate(type = "Histórico"),
  gastos_timegpt_fcst %>% mutate(type = "Pronóstico")
)

full_data_gastos |> 
  dplyr::mutate(y= ifelse(is.na(y),TimeGPT/1000, y/1000)) |> 
# Visualize results
ggplot(aes(x = ds, y = y)) +
  geom_ribbon(aes(ymin = `TimeGPT-lo-95`/1000, ymax = `TimeGPT-hi-95`/1000),
              fill = "#4B9CD3", alpha = 0.2) +
  geom_ribbon(aes(ymin = `TimeGPT-lo-80`/1000, ymax = `TimeGPT-hi-80`/1000),
              fill = "#4B9CD3", alpha = 0.3) +
  geom_line(aes(color = type), linewidth = 1.5) +
  geom_vline(xintercept = max(filter(full_data_gastos, type == "Histórico")$ds),
             linetype = "dashed", color = "red", linewidth = 0.8) +
  scale_x_date(
    date_breaks = "3 months",
    date_labels = "%b %Y"
  ) +
  scale_y_continuous(
    name = "Gastos Totales",
    labels = scales::comma#,
    # breaks = pretty(full_data_gastos$y, n = 10),
    # expand = expansion(mult = c(0.05, 0.05))
  ) +
  scale_color_manual(
    name = "Leyenda",
    values = c("Histórico" = "black", "Pronóstico" = "#4B9CD3")
  ) +
  labs(
    title = "Pronóstico de Gastos Mensuales (TimeGPT, ajustando por UF promedio mensual)",
    subtitle = "Intervalos de confianza al 80% (más oscuro) y 95% (más claro)",
    x = "Fecha",
    y = "Gastos Totales",
    color = "Leyenda"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.title.x = element_text(size = 10),
    axis.title.y = element_text(size = 10),
    legend.position = "bottom",
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )


Session Info

Sys.getenv("R_LIBS_USER")
## [1] "D:\\a\\_temp\\Library"
sessionInfo()
## R version 4.4.0 (2024-04-24 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows Server 2022 x64 (build 26100)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=Spanish_Chile.1252  LC_CTYPE=Spanish_Chile.1252   
## [3] LC_MONETARY=Spanish_Chile.1252 LC_NUMERIC=C                  
## [5] LC_TIME=Spanish_Chile.1252    
## system code page: 65001
## 
## time zone: UTC
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] prophet_1.0        rlang_1.1.6        Rcpp_1.1.0         scales_1.4.0      
##  [5] ggiraph_0.9.2      tidytext_0.4.3     DT_0.34.0          janitor_2.2.1     
##  [9] autoplotly_0.1.4   rvest_1.0.5        plotly_4.11.0      xts_0.14.1        
## [13] forecast_8.24.0    wordcloud_2.6      RColorBrewer_1.1-3 SnowballC_0.7.1   
## [17] tm_0.7-16          NLP_0.3-2          tsibble_1.1.6      lubridate_1.9.4   
## [21] forcats_1.0.1      dplyr_1.1.4        purrr_1.2.0        tidyr_1.3.1       
## [25] tibble_3.3.0       tidyverse_2.0.0    gsynth_1.2.1       sjPlot_2.9.0      
## [29] lattice_0.22-6     GGally_2.4.0       ggplot2_4.0.1      gridExtra_2.3     
## [33] plotrix_3.8-13     sparklyr_1.9.3     httr_1.4.7         readxl_1.4.5      
## [37] zoo_1.8-14         stringr_1.6.0      stringi_1.8.7      DataExplorer_0.8.4
## [41] data.table_1.17.8  reshape2_1.4.5     fUnitRoots_4040.81 plyr_1.8.9        
## [45] readr_2.1.6       
## 
## loaded via a namespace (and not attached):
##   [1] bitops_1.0-9            cellranger_1.1.0        lifecycle_1.0.4        
##   [4] httr2_1.2.1             StanHeaders_2.32.10     doParallel_1.0.17      
##   [7] globals_0.18.0          vroom_1.6.6             MASS_7.3-60.2          
##  [10] crosstalk_1.2.2         magrittr_2.0.4          sass_0.4.10            
##  [13] rmarkdown_2.30          jquerylib_0.1.4         yaml_2.3.10            
##  [16] fracdiff_1.5-3          doRNG_1.8.6.2           askpass_1.2.1          
##  [19] pkgbuild_1.4.8          DBI_1.2.3               abind_1.4-8            
##  [22] quadprog_1.5-8          nnet_7.3-19             rappdirs_0.3.3         
##  [25] sandwich_3.1-1          gdtools_0.4.4           inline_0.3.21          
##  [28] data.tree_1.2.0         tokenizers_0.3.0        listenv_0.10.0         
##  [31] anytime_0.3.12          spatial_7.3-17          parallelly_1.45.1      
##  [34] codetools_0.2-20        xml2_1.5.0              tidyselect_1.2.1       
##  [37] farver_2.1.2            urca_1.3-4              matrixStats_1.5.0      
##  [40] stats4_4.4.0            jsonlite_2.0.0          ellipsis_0.3.2         
##  [43] Formula_1.2-5           iterators_1.0.14        systemfonts_1.3.1      
##  [46] foreach_1.5.2           tools_4.4.0             glue_1.8.0             
##  [49] xfun_0.54               TTR_0.24.4              ggfortify_0.4.19       
##  [52] loo_2.8.0               withr_3.0.2             timeSeries_4041.111    
##  [55] fastmap_1.2.0           openssl_2.3.4           caTools_1.18.3         
##  [58] digest_0.6.39           timechange_0.3.0        R6_2.6.1               
##  [61] lfe_3.1.1               colorspace_2.1-2        networkD3_0.4.1        
##  [64] gtools_3.9.5            generics_0.1.4          fontLiberation_0.1.0   
##  [67] htmlwidgets_1.6.4       ggstats_0.11.0          pkgconfig_2.0.3        
##  [70] gtable_0.3.6            timeDate_4051.111       lmtest_0.9-40          
##  [73] S7_0.2.1                selectr_0.5-0           janeaustenr_1.0.0      
##  [76] htmltools_0.5.8.1       fontBitstreamVera_0.1.1 tseries_0.10-58        
##  [79] snakecase_0.11.1        knitr_1.50              rstudioapi_0.17.1      
##  [82] tzdb_0.5.0              nlme_3.1-164            curl_7.0.0             
##  [85] cachem_1.1.0            KernSmooth_2.23-22      parallel_4.4.0         
##  [88] fBasics_4041.97         pillar_1.11.1           vctrs_0.6.5            
##  [91] gplots_3.2.0            slam_0.1-55             dbplyr_2.5.1           
##  [94] xtable_1.8-4            evaluate_1.0.5          mvtnorm_1.3-3          
##  [97] cli_3.6.5               compiler_4.4.0          crayon_1.5.3           
## [100] rngtools_1.5.2          future.apply_1.20.0     labeling_0.4.3         
## [103] rstan_2.32.7            QuickJSR_1.8.1          viridisLite_0.4.2      
## [106] lazyeval_0.2.2          fontquiver_0.2.1        Matrix_1.7-0           
## [109] hms_1.1.4               bit64_4.6.0-1           future_1.68.0          
## [112] nixtlar_0.6.2           extraDistr_1.10.0       igraph_2.2.1           
## [115] RcppParallel_5.1.11-1   bslib_0.9.0             quantmod_0.4.28        
## [118] bit_4.6.0
#save.image("__analisis.RData")

sesion_info <- devtools::session_info()
dplyr::select(
  tibble::as_tibble(sesion_info$packages),
  c(package, loadedversion, source)
) %>% 
  DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
              caption = htmltools::tags$caption(
        style = 'caption-side: top; text-align: left;',
        '', htmltools::em('Packages')),
      options=list(
initComplete = htmlwidgets::JS(
        "function(settings, json) {",
        "$(this.api().tables().body()).css({
            'font-family': 'Helvetica Neue',
            'font-size': '50%', 
            'code-inline-font-size': '15%', 
            'white-space': 'nowrap',
            'line-height': '0.75em',
            'min-height': '0.5em'
            });",#;
        "}")))